seed = 65
set.seed(seed)
#print(as.matrix(colnames(big_df)))
col_id <- grep('[(]v[)]', colnames(big_df))
colnames(big_df) <- gsub('[(]v[)]', '', colnames(big_df))
smpl <- denoisingCTF::t_asinh(big_df)
# UMAP
# umap_smp <- umap::umap(smpl[,col_id])
# umap_smp <- as.data.frame(cbind(smpl,
# UMAP1 = umap_smp$layout[,1],
# UMAP2 = umap_smp$layout[,2]))
# save(umap_smp, file = "UMAPcelllines_paper2020.RData")
load("~/Documents/Massion_lab/manuscripts/prelim_cytof_adc/data/cell_lines/processed_files/mix/UMAPcelllines_paper2020.RData")
## TableGrob (4 x 3) "arrange": 12 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (2-2,1-1) arrange gtable[layout]
## 5 5 (2-2,2-2) arrange gtable[layout]
## 6 6 (2-2,3-3) arrange gtable[layout]
## 7 7 (3-3,1-1) arrange gtable[layout]
## 8 8 (3-3,2-2) arrange gtable[layout]
## 9 9 (3-3,3-3) arrange gtable[layout]
## 10 10 (4-4,1-1) arrange gtable[layout]
## 11 11 (4-4,2-2) arrange gtable[layout]
## 12 12 (4-4,3-3) arrange gtable[layout]
## Warning: package 'gplots' was built under R version 3.6.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Fig 2. Clustering cell lines
# find optimal number of clusters (elbow plot)
# DetermineNumberOfClusters(smpl[,col_id],k_max=45,plot=T,smooth=0.2, iter.max=50, seed = 45,
# ask_ft = F,arcsn_tr = F)
# k-means clustering
set.seed(45)
cl_data <- stats::kmeans(smpl[,col_id], centers = 8, iter.max = 50)$cluster
df_cluster <- cbind(smpl[,col_id], 'cluster'= cl_data, "cl_ID"=smpl$cl_ID, umap_smp[c('UMAP1', 'UMAP2')])
# proyect clusters on UMAP
kl <- c("gray60", "#AAD7AC", "#F9C769", "steelblue1", "#B27CC1", "lightskyblue1", "#F2BFB9", "cornflowerblue")
ClusterUMAP_plot(df_cluster, color_by_cluster = T, color_by_protein = F, pallete = F,
cluster_col = 'cluster', plot_clusnames = T, plot_title = 'Cluster ID',
lg_names = levels(as.factor(df_cluster$cluster)),
x_lim = c(-14,11), y_lim = c(-10,14), plot_colors = kl)
# barplot
prcnt_by_cl <- ClassAbundanceByPt(data=df_cluster, ptID_col = 'cluster',
class_col = 'cl_ID')
prcnt_by_cl <- prcnt_by_cl[order(row.names(prcnt_by_cl)),]
#rownames(prcnt_by_cl) <- paste0(rownames(prcnt_by_pt), '_', ref$CANARY)
prcnt_by_cl <- prcnt_by_cl*100
ct <- c("#EF7E33", "#52A02E", "#D7392E", "#9467BD", "#2977B4")
bp_order <- c(4,8,6,1,5,7,2,3)
dendrogram_barplot(prcnt_by_cl, dist_method = 'euclidean',
hclust_method = 'complete', coul = ct, BPOrderAsDendrogram=F,
bp_order=bp_order, xlabl = "Cell type (% of cluster)" , ylabl = "Cluster ID",
cex_names = 1, cex_axis = 1.2,cex_lab = 0.5) #cex.sub and cex.lab do nothing
load("~/Documents/Massion_lab/manuscripts/prelim_cytof_adc/data/cell_lines/processed_files/cell_lines_replicates_gated/cell_lines_val_labeled.RData")
#print(as.matrix(colnames(big_df)))
col_id <- grep('[(]v[)]', colnames(big_df_ctl))
#x <- c(15, 37, 41, 38, 26, 18, 45, 29, 46, 34, 44, 47)
colnames(big_df_ctl) <- gsub('[(]v[)]', '', colnames(big_df_ctl))
df <- denoisingCTF::t_asinh(big_df_ctl[,col_id])
colnames(df) <- gsub(" ","", sapply(strsplit(as.character(colnames(df)), "_"), "[[", 2))
df <- cbind(df, cl_ID=big_df_ctl$cl_ID, sample_ID=big_df_ctl$sample_ID)
df <- melt(df)
## Using cl_ID, sample_ID as id variables
# A549
ggplot(df[which(df$cl_ID=='A549'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# H23
ggplot(df[which(df$cl_ID=='H23'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# H3122
ggplot(df[which(df$cl_ID=='H3122'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# Monocytes
ggplot(df[which(df$cl_ID=='Monocytes'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# PC9
ggplot(df[which(df$cl_ID=='PC9'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# Tc.cells
ggplot(df[which(df$cl_ID=='Tc.cells'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
# Th.cells
ggplot(df[which(df$cl_ID=='Th.cells'),], aes(x=value, group=sample_ID, color=sample_ID)) +
geom_density(adjust=1.5) + facet_wrap( ~ variable, scales="free")
umap_smp['subtype4'] <- as.character(umap_smp$subtype)
umap_smp[which(umap_smp$cell_type=='Epithelial'), 'subtype4'] <- 'ECC'
umap_smp$subtype4 <- as.factor(umap_smp$subtype4)
umap_smp$subtype4 <- factor(umap_smp$subtype4, levels = c("ECC", "Endothelial", "Mesenchymal", "Other_immune", "Myeloid", "Tc_cells", "Th_cells"))
## TableGrob (2 x 6) "arrange": 12 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (1-1,5-5) arrange gtable[layout]
## 6 6 (1-1,6-6) arrange gtable[layout]
## 7 7 (2-2,1-1) arrange gtable[layout]
## 8 8 (2-2,2-2) arrange gtable[layout]
## 9 9 (2-2,3-3) arrange gtable[layout]
## 10 10 (2-2,4-4) arrange gtable[layout]
## 11 11 (2-2,5-5) arrange gtable[layout]
## 12 12 (2-2,6-6) arrange gtable[layout]
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
##
## Attaching package: 'operators'
## The following objects are masked from 'package:base':
##
## options, strrep
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:operators':
##
## %>%
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## cell_type p.value FDR_corrected
## 1 Epithelial 0.9142857 0.9142857
## 2 Endothelial 0.1714286 0.6000000
## 3 Mesenchymal 0.4761905 0.6666667
## 4 Other_immune 0.7619048 0.8888889
## 5 Myeloid 0.3523810 0.6166667
## 6 Tc_cells 0.1714286 0.6000000
## 7 Th_cells 0.3523810 0.6166667
## Using pt_ID, CANARY as id variables
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(exp_median[ig1, i], exp_median[ig2, i]): cannot
## compute exact p-value with ties
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(2.63407697361086, 0.0695405059165581,
## 2.61931335471328, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.347064923349515, 0.756454263937334,
## 0.461532710617599, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(6.85846430892733e-05, 0.176901427304811, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.604787413934418, 2.00996443199906,
## 0.22811706521791, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 141Pr_EpCAM 0.76190476 1.0000000
## 2 142Nd_ccasp3 1.00000000 1.0000000
## 3 144Nd_HLA-ABC 0.06910652 0.6095238
## 4 147Sm_b-CAT 0.10550173 0.6095238
## 5 151Eu_TTF1 0.22976627 0.9190651
## 6 154Sm_CD45 0.60952381 1.0000000
## 7 155Gd_CD56 1.00000000 1.0000000
## 8 156Gd_Vimentin 0.91428571 1.0000000
## 9 158Gd_p-STAT3 0.91428571 1.0000000
## 10 160Gd_MDM2 0.91428571 1.0000000
## 11 161Dy_Cytokeratin 0.91428571 1.0000000
## 12 163Dy_TP63 0.60952381 1.0000000
## 13 164Dy_CK7 1.00000000 1.0000000
## 14 166Er_CD44 0.47619048 1.0000000
## 15 170Yb_CD3 0.76190476 1.0000000
## 16 174Yb_HLA-DR 0.11428571 0.6095238
## Using CANARY, pt_ID as id variables
## Protein p.value FDR_corrected
## 1 144Nd_HLA-ABC 0.1714286 0.7619048
## 2 145Nd_CD31 0.4761905 0.7619048
## 3 146Nd_Thioredoxin 0.3523810 0.7619048
## 4 147Sm_b-CAT 0.4761905 0.7619048
## 5 156Gd_Vimentin 1.0000000 1.0000000
## 6 158Gd_p-STAT3 0.7619048 0.8465608
## 7 160Gd_MDM2 0.6095238 0.7619048
## 8 161Dy_Cytokeratin 0.4761905 0.7619048
## 9 163Dy_TP63 0.6095238 0.7619048
## 10 174Yb_HLA-DR 0.1714286 0.7619048
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(0, 0.0958118253303701, 0.284162729407862, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.802649816095183, 0.215492384630114,
## 0.0605647943338007, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 141Pr_EpCAM 0.76190476 0.9142857
## 2 142Nd_ccasp3 0.91428571 0.9142857
## 3 155Gd_CD56 0.47619048 0.8571429
## 4 156Gd_Vimentin 0.91428571 0.9142857
## 5 158Gd_p-STAT3 0.11428571 0.5142857
## 6 160Gd_MDM2 0.19945761 0.5983728
## 7 163Dy_TP63 0.47619048 0.8571429
## 8 170Yb_CD3 0.91406196 0.9142857
## 9 174Yb_HLA-DR 0.03809524 0.3428571
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(3.06708656196431, 0.066910551904235,
## 3.04431780411568, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.202586121632867, 0.0645635010399363, : cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(c(0.501727573634015, 0.130067040154415,
## 0.712432219756351, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 144Nd_HLA-ABC 0.1055017 0.8571429
## 2 154Sm_CD45 0.4761905 0.9523810
## 3 156Gd_Vimentin 0.4761905 0.9523810
## 4 158Gd_p-STAT3 0.7619048 0.9523810
## 5 159Tb_CD4 0.9140620 1.0000000
## 6 163Dy_TP63 0.6095238 0.9523810
## 7 166Er_CD44 1.0000000 1.0000000
## 8 170Yb_CD3 0.7619048 0.9523810
## 9 171Yb_CD11b 0.3314247 0.9523810
## 10 174Yb_HLA-DR 0.1714286 0.8571429
## Using CANARY, pt_ID as id variables
## Protein p.value FDR_corrected
## 1 144Nd_HLA-ABC 0.03174603 0.2539683
## 2 154Sm_CD45 0.90476190 0.9047619
## 3 156Gd_Vimentin 0.55555556 0.8344671
## 4 163Dy_TP63 0.19047619 0.5079365
## 5 166Er_CD44 0.73015873 0.8344671
## 6 168Er_CD8 0.55555556 0.8344671
## 7 170Yb_CD3 0.73015873 0.8344671
## 8 174Yb_HLA-DR 0.11111111 0.4444444
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(0, 0.0561621657336121, 0, 0), c(0, 0,
## 1.50456532271426, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 144Nd_HLA-ABC 0.6095238 0.9142857
## 2 154Sm_CD45 0.3523810 0.7928571
## 3 156Gd_Vimentin 0.2571429 0.7714286
## 4 159Tb_CD4 0.6095238 0.9142857
## 5 163Dy_TP63 0.7619048 0.9795918
## 6 166Er_CD44 0.1714286 0.7714286
## 7 169Tm_CD24 1.0000000 1.0000000
## 8 170Yb_CD3 0.2571429 0.7714286
## 9 174Yb_HLA-DR 0.9142857 1.0000000
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(3.30075534359615, 0.0972935332589382,
## 3.31354462816258, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(1.62386202915046, 0.717296683223608,
## 2.44717234480325, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.748820669157824, 0.123253874437529,
## 1.67192224879611, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.63662474810693, 0.0913139040611759,
## 1.20841095112529, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 144Nd_HLA-ABC 0.1645218 0.9523810
## 2 145Nd_CD31 0.4541743 0.9523810
## 3 146Nd_Thioredoxin 0.5929097 0.9846154
## 4 154Sm_CD45 0.4761905 0.9523810
## 5 156Gd_Vimentin 0.4761905 0.9523810
## 6 158Gd_p-STAT3 0.3523810 0.9523810
## 7 159Tb_CD4 0.7461278 0.9846154
## 8 160Gd_MDM2 0.9142857 0.9846154
## 9 163Dy_TP63 1.0000000 1.0000000
## 10 166Er_CD44 0.9142857 0.9846154
## 11 171Yb_CD11b 0.4761905 0.9523810
## 12 172Yb_p-S6 0.9142857 0.9846154
## 13 174Yb_HLA-DR 0.2571429 0.9523810
## 14 175Lu_PD-L1 0.7619048 0.9846154
## Using CANARY, pt_ID as id variables
## Warning in wilcox.test.default(c(2.33079293987923, 0.0582644008491126,
## 1.75087785796523, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(1.63156525887895, 0.322305842850225,
## 1.32203451663395, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(1.13653172069154, 0.0527562183492093,
## 1.50292979733778, : cannot compute exact p-value with ties
## Protein p.value FDR_corrected
## 1 141Pr_EpCAM 0.11428571 0.4114286
## 2 142Nd_ccasp3 0.35238095 0.5714286
## 3 144Nd_HLA-ABC 0.16452182 0.4408163
## 4 147Sm_b-CAT 0.17142857 0.4408163
## 5 148Nd_HER2 0.03809524 0.4114286
## 6 151Eu_TTF1 0.09898793 0.4114286
## 7 154Sm_CD45 0.47619048 0.5714286
## 8 155Gd_CD56 0.76190476 0.8067227
## 9 156Gd_Vimentin 0.47619048 0.5714286
## 10 158Gd_p-STAT3 0.35238095 0.5714286
## 11 160Gd_MDM2 0.91428571 0.9142857
## 12 161Dy_Cytokeratin 0.11428571 0.4114286
## 13 162Dy_MET 0.25714286 0.5142857
## 14 163Dy_TP63 0.47619048 0.5714286
## 15 164Dy_CK7 0.47619048 0.5714286
## 16 165Ho_EGFR 0.10550173 0.4114286
## 17 170Yb_CD3 0.60952381 0.6857143
## 18 174Yb_HLA-DR 0.25714286 0.5142857
ref_samples <- ref
# Check HLA-DR expression in controls
load("/Users/senosam/Documents/Massion_lab/CyTOF_summary/controls/annotated_controls.RData")
ref_clt <- ref[which(ref$CyTOF_date %in% ref_samples$CyTOF_date),]
annot_df_ctl <- annot_df_ctl[which(annot_df_ctl$CyTOF_date %in% ref_samples$CyTOF_date),]
ramos <- annot_df_ctl[which(annot_df_ctl$cell_line=='Ramos'),]
a549 <- annot_df_ctl[which(annot_df_ctl$cell_line=='A549'),]
ramos_median <- aggregate(denoisingCTF::t_asinh(ramos[,c(15, 17:31, 33:35, 37:48, 50:51)]), list(ramos[,'CyTOF_date']), median)
a549_median <- aggregate(denoisingCTF::t_asinh(a549[,c(15, 17:31, 33:35, 37:48, 50:51)]), list(a549[,'CyTOF_date']), median)
a549_median['Cell_line'] <- rep('A549', nrow(a549_median))
ramos_median['Cell_line'] <- rep('Ramos', nrow(ramos_median))
ctl_median <- rbind(a549_median, ramos_median)
ggplot(ctl_median, aes(x=Cell_line, y=`174Yb_HLA-DR`, color = Cell_line)) +
geom_boxplot() +
ylim(0,6)+
#facet_wrap(~variable) +
theme(plot.title = element_text(hjust = 0.5, size=22),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14))
summary(ctl_median$`174Yb_HLA-DR`[1:4])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4604 0.7593 0.8666 0.8193 0.9267 1.0836
summary(ctl_median$`174Yb_HLA-DR`[5:8])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.186 3.244 4.329 3.726 4.812 5.059
umap_smp['subtype'] <- gsub('Epithelial', 'ECCc' ,umap_smp$subtype)
umap_smp$subtype <- as.factor(umap_smp$subtype)
umap_smp$subtype <- factor(umap_smp$subtype, levels = c(paste0("ECCc_", 1:10)))
#umap_smp$subtype4 <- factor(umap_smp$subtype4, levels = c("ECC", "Endothelial", "Mesenchymal", "Other_immune", "Myeloid", "Tc_cells", "Th_cells"))
## Warning: Removed 28 rows containing non-finite values (stat_density2d).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
## Warning: Ignoring unknown parameters: label.size
## `geom_smooth()` using formula 'y ~ x'
## Warning: Ignoring unknown parameters: label.size
## `geom_smooth()` using formula 'y ~ x'